# optimizer
optimizer = dict(type='SGD', lr=0.003, momentum=0.9, weight_decay=0.0005) #0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=0.001,
    step=[16])
total_epochs = 22
